kompute : adapt ggml-kompute API to be compatible with C

This commit is contained in:
Jared Van Bortel 2024-01-26 17:16:25 -05:00
parent 57cecad175
commit 4b0c96a9e2
3 changed files with 62 additions and 49 deletions

View file

@ -36,6 +36,8 @@
#include <algorithm> #include <algorithm>
#include <array> #include <array>
#include <cassert> #include <cassert>
#include <cstdint>
#include <cstdio>
#include <cstring> #include <cstring>
#include <iostream> #include <iostream>
#include <memory> #include <memory>
@ -139,7 +141,7 @@ static bool ggml_vk_checkPhysicalDeviceFeatures(vk::PhysicalDevice physicalDevic
return true; return true;
} }
static std::string ggml_vk_getVendorName(uint32_t vendorID) { static const char * ggml_vk_getVendorName(uint32_t vendorID) {
switch (vendorID) { switch (vendorID) {
case 0x10DE: case 0x10DE:
return "nvidia"; return "nvidia";
@ -152,7 +154,7 @@ static std::string ggml_vk_getVendorName(uint32_t vendorID) {
} }
} }
std::vector<ggml_vk_device> ggml_vk_available_devices(size_t memoryRequired) { static std::vector<ggml_vk_device> ggml_vk_available_devices_internal(size_t memoryRequired) {
std::vector<ggml_vk_device> results; std::vector<ggml_vk_device> results;
if (!komputeManager()->hasVulkan() || !komputeManager()->hasInstance()) if (!komputeManager()->hasVulkan() || !komputeManager()->hasInstance())
return results; return results;
@ -206,13 +208,16 @@ std::vector<ggml_vk_device> ggml_vk_available_devices(size_t memoryRequired) {
d.index = i; d.index = i;
d.type = properties.deviceType; d.type = properties.deviceType;
d.heapSize = heapSize; d.heapSize = heapSize;
d.name = properties.deviceName; d.vendor = strdup(ggml_vk_getVendorName(properties.vendorID));
d.subgroupSize = subgroupProperties.subgroupSize; d.subgroupSize = subgroupProperties.subgroupSize;
size_t n_idx = ++count_by_name[d.name];
std::string name(properties.deviceName);
size_t n_idx = ++count_by_name[name];
if (n_idx > 1) { if (n_idx > 1) {
d.name += " (" + std::to_string(n_idx) + ")"; name += " (" + std::to_string(n_idx) + ")";
} }
d.vendor = ggml_vk_getVendorName(properties.vendorID); d.name = strdup(name.c_str());
results.push_back(d); results.push_back(d);
} }
@ -232,6 +237,20 @@ std::vector<ggml_vk_device> ggml_vk_available_devices(size_t memoryRequired) {
return results; return results;
} }
// public API returns a C-style array
ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count) {
auto devices = ggml_vk_available_devices_internal(memoryRequired);
*count = devices.size();
if (devices.empty()) {
return nullptr;
}
size_t nbytes = sizeof (ggml_vk_device) * (devices.size());
auto * arr = static_cast<ggml_vk_device *>(malloc(nbytes));
memcpy(&arr, devices.data(), nbytes);
return arr;
}
static void ggml_vk_filterByVendor(std::vector<ggml_vk_device>& devices, const std::string& targetVendor) { static void ggml_vk_filterByVendor(std::vector<ggml_vk_device>& devices, const std::string& targetVendor) {
devices.erase( devices.erase(
std::remove_if(devices.begin(), devices.end(), std::remove_if(devices.begin(), devices.end(),
@ -252,32 +271,25 @@ static void ggml_vk_filterByName(std::vector<ggml_vk_device>& devices, const std
); );
} }
bool ggml_vk_init_device(size_t memoryRequired, const std::string &device) { static bool ggml_vk_init_device(size_t memoryRequired, const std::string & device) {
if (device.empty()) if (device.empty())
return false; return false;
std::vector<ggml_vk_device> devices = ggml_vk_available_devices(memoryRequired); auto devices = ggml_vk_available_devices_internal(memoryRequired);
if (device == "gpu") { if (device == "amd" || device == "nvidia" || device == "intel") {
if (devices.size() != 0)
return ggml_vk_init_device(devices.front());
} else if (device == "amd" || device == "nvidia" || device == "intel") {
ggml_vk_filterByVendor(devices, device); ggml_vk_filterByVendor(devices, device);
if (devices.size() != 0) } else if (device != "gpu") {
return ggml_vk_init_device(devices.front());
} else {
ggml_vk_filterByName(devices, device); ggml_vk_filterByName(devices, device);
if (devices.size() != 0)
return ggml_vk_init_device(devices.front());
} }
return ggml_vk_has_device(); return !devices.empty() && ggml_vk_init_device_idx(devices[0].index);
} }
bool ggml_vk_init_device(const ggml_vk_device &device) { bool ggml_vk_init_device(size_t memoryRequired, const char * device) {
return ggml_vk_init_device(device.index); return ggml_vk_init_device(memoryRequired, std::string(device));
} }
bool ggml_vk_init_device(int device) { bool ggml_vk_init_device_idx(int device) {
komputeManager()->initializeDevice(device, {}, komputeManager()->initializeDevice(device, {},
{"VK_KHR_shader_float16_int8", "VK_KHR_8bit_storage", {"VK_KHR_shader_float16_int8", "VK_KHR_8bit_storage",
"VK_KHR_16bit_storage", "VK_KHR_shader_non_semantic_info"}); "VK_KHR_16bit_storage", "VK_KHR_shader_non_semantic_info"});
@ -311,7 +323,7 @@ ggml_vk_device ggml_vk_current_device() {
if (!komputeManager()->hasDevice()) if (!komputeManager()->hasDevice())
return ggml_vk_device(); return ggml_vk_device();
std::vector<ggml_vk_device> devices = ggml_vk_available_devices(0); auto devices = ggml_vk_available_devices_internal(0);
ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName); ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName);
return devices.front(); return devices.front();
} }

View file

@ -3,38 +3,35 @@
#include "ggml.h" #include "ggml.h"
#include "ggml-backend.h" #include "ggml-backend.h"
#include <cstddef> #include <stdbool.h>
#include <string> #include <stddef.h>
#include <vector>
struct ggml_vk_device {
int index = 0;
int type = 0; // same as VkPhysicalDeviceType
size_t heapSize = 0;
std::string name;
std::string vendor;
int subgroupSize = 0;
};
std::vector<ggml_vk_device> ggml_vk_available_devices(size_t memoryRequired);
bool ggml_vk_init_device(size_t memoryRequired, const std::string &device);
bool ggml_vk_init_device(const ggml_vk_device &device);
bool ggml_vk_init_device(int device);
bool ggml_vk_free_device();
bool ggml_vk_has_vulkan();
bool ggml_vk_has_device();
bool ggml_vk_using_vulkan();
ggml_vk_device ggml_vk_current_device();
//
// backend API
// user-code should use only these functions
//
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
struct ggml_vk_device {
int index;
int type; // same as VkPhysicalDeviceType
size_t heapSize;
const char * name;
const char * vendor;
int subgroupSize;
};
struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
bool ggml_vk_init_device(size_t memoryRequired, const char * device);
bool ggml_vk_init_device_idx(int device);
bool ggml_vk_free_device(void);
bool ggml_vk_has_vulkan(void);
bool ggml_vk_has_device(void);
bool ggml_vk_using_vulkan(void);
struct ggml_vk_device ggml_vk_current_device(void);
//
// backend API
//
// forward declaration // forward declaration
typedef struct ggml_backend * ggml_backend_t; typedef struct ggml_backend * ggml_backend_t;

View file

@ -1,3 +1,7 @@
#include "llama.h" #include "llama.h"
#ifdef GGML_USE_KOMPUTE
#include "ggml-kompute.h"
#endif
int main(void) {} int main(void) {}