Merge 424e3a52fe
into 3f1ae2e32c
This commit is contained in:
commit
0e63704b9a
3 changed files with 453 additions and 294 deletions
|
@ -11,6 +11,8 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define GGML_KOMPUTE_MAX_DEVICES 16
|
||||||
|
|
||||||
struct ggml_vk_device {
|
struct ggml_vk_device {
|
||||||
int index;
|
int index;
|
||||||
int type; // same as VkPhysicalDeviceType
|
int type; // same as VkPhysicalDeviceType
|
||||||
|
@ -23,10 +25,10 @@ struct ggml_vk_device {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
|
struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
|
||||||
|
int ggml_backend_kompute_get_device_count(void);
|
||||||
|
void ggml_backend_kompute_get_device_memory(int device, size_t * free, size_t * total);
|
||||||
bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
|
bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
|
||||||
bool ggml_vk_has_vulkan(void);
|
bool ggml_vk_has_vulkan(void);
|
||||||
bool ggml_vk_has_device(void);
|
|
||||||
struct ggml_vk_device ggml_vk_current_device(void);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// backend API
|
// backend API
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -3468,6 +3468,8 @@ static size_t llama_get_device_count(const llama_model & model) {
|
||||||
count = ggml_backend_sycl_get_device_count();
|
count = ggml_backend_sycl_get_device_count();
|
||||||
#elif defined(GGML_USE_VULKAN)
|
#elif defined(GGML_USE_VULKAN)
|
||||||
count = ggml_backend_vk_get_device_count();
|
count = ggml_backend_vk_get_device_count();
|
||||||
|
#elif defined(GGML_USE_KOMPUTE)
|
||||||
|
count = ggml_backend_kompute_get_device_count();
|
||||||
#elif defined(GGML_USE_CANN)
|
#elif defined(GGML_USE_CANN)
|
||||||
return ggml_backend_cann_get_device_count();
|
return ggml_backend_cann_get_device_count();
|
||||||
#endif
|
#endif
|
||||||
|
@ -3572,6 +3574,11 @@ static size_t llama_get_device_memory(const llama_model & model, int device) {
|
||||||
size_t free;
|
size_t free;
|
||||||
ggml_backend_vk_get_device_memory(local_device, &free, &total);
|
ggml_backend_vk_get_device_memory(local_device, &free, &total);
|
||||||
return free;
|
return free;
|
||||||
|
#elif defined(GGML_USE_KOMPUTE)
|
||||||
|
size_t total;
|
||||||
|
size_t free;
|
||||||
|
ggml_backend_kompute_get_device_memory(device, &free, &total);
|
||||||
|
return free;
|
||||||
#elif defined(GGML_USE_CANN)
|
#elif defined(GGML_USE_CANN)
|
||||||
size_t total;
|
size_t total;
|
||||||
size_t free;
|
size_t free;
|
||||||
|
@ -18997,6 +19004,8 @@ size_t llama_max_devices(void) {
|
||||||
return GGML_SYCL_MAX_DEVICES;
|
return GGML_SYCL_MAX_DEVICES;
|
||||||
#elif defined(GGML_USE_VULKAN)
|
#elif defined(GGML_USE_VULKAN)
|
||||||
return GGML_VK_MAX_DEVICES;
|
return GGML_VK_MAX_DEVICES;
|
||||||
|
#elif defined(GGML_USE_KOMPUTE)
|
||||||
|
return GGML_KOMPUTE_MAX_DEVICES;
|
||||||
#elif defined(GGML_USE_CANN)
|
#elif defined(GGML_USE_CANN)
|
||||||
return GGML_CANN_MAX_DEVICES;
|
return GGML_CANN_MAX_DEVICES;
|
||||||
#else
|
#else
|
||||||
|
@ -19350,13 +19359,35 @@ struct llama_context * llama_new_context_with_model(
|
||||||
}
|
}
|
||||||
#elif defined(GGML_USE_KOMPUTE)
|
#elif defined(GGML_USE_KOMPUTE)
|
||||||
if (model->n_gpu_layers > 0) {
|
if (model->n_gpu_layers > 0) {
|
||||||
auto * backend = ggml_backend_kompute_init(model->main_gpu);
|
if (model->split_mode == LLAMA_SPLIT_MODE_NONE) {
|
||||||
if (backend == nullptr) {
|
auto * backend = ggml_backend_kompute_init(model->main_gpu);
|
||||||
LLAMA_LOG_ERROR("%s: failed to initialize Kompute backend\n", __func__);
|
if (!backend) {
|
||||||
|
LLAMA_LOG_ERROR("%s: failed to initialize Kompute backend\n", __func__);
|
||||||
|
llama_free(ctx);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
ctx->backends.push_back(backend);
|
||||||
|
} else if (model->split_mode == LLAMA_SPLIT_MODE_LAYER) {
|
||||||
|
size_t count = 0;
|
||||||
|
auto * devices =ggml_vk_available_devices(0, &count);
|
||||||
|
for (size_t i = 0; i < count; i++) {
|
||||||
|
LLAMA_LOG_INFO("Kompute: Found device #%d, %s, %s, max-alloc %ld, heap-size %lu\n",
|
||||||
|
devices[i].index, devices[i].vendor, devices[i].name,
|
||||||
|
devices[i].maxAlloc, devices[i].heapSize);
|
||||||
|
auto * backend = ggml_backend_kompute_init(devices[i].index);
|
||||||
|
if (!backend) {
|
||||||
|
LLAMA_LOG_ERROR("%s: failed to initialize Kompute backend\n", __func__);
|
||||||
|
llama_free(ctx);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
ctx->backends.push_back(backend);
|
||||||
|
}
|
||||||
|
free(devices);
|
||||||
|
} else {
|
||||||
|
LLAMA_LOG_ERROR("%s: Failed to init Kompute backend: split mode %d not supported\n", __func__, model->split_mode);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
ctx->backends.push_back(backend);
|
|
||||||
}
|
}
|
||||||
#elif defined(GGML_USE_CANN)
|
#elif defined(GGML_USE_CANN)
|
||||||
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
|
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue