Fix queue selection for AMD RADV
This commit is contained in:
parent
ae7325fdff
commit
e35d28fec3
1 changed files with 13 additions and 9 deletions
|
@ -221,26 +221,26 @@ static void ggml_vk_dispatch_pipeline(vk_pipeline& pipeline, std::vector<vk_buff
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t ggml_vk_find_queue_family_index(std::vector<vk::QueueFamilyProperties>& queue_family_props, const vk::QueueFlags& required, const vk::QueueFlags& avoid, int32_t compute_index) {
|
static uint32_t ggml_vk_find_queue_family_index(std::vector<vk::QueueFamilyProperties>& queue_family_props, const vk::QueueFlags& required, const vk::QueueFlags& avoid, int32_t compute_index, int32_t min_num_queues) {
|
||||||
const uint32_t qfsize = queue_family_props.size();
|
const uint32_t qfsize = queue_family_props.size();
|
||||||
|
|
||||||
// Try with avoid preferences first
|
// Try with avoid preferences first
|
||||||
for (uint32_t i = 0; i < qfsize; i++) {
|
for (uint32_t i = 0; i < qfsize; i++) {
|
||||||
if ((compute_index < 0 || i != compute_index) && queue_family_props[i].queueFlags & required && !(queue_family_props[i].queueFlags & avoid)) {
|
if (queue_family_props[i].queueCount >= min_num_queues && (compute_index < 0 || i != compute_index) && queue_family_props[i].queueFlags & required && !(queue_family_props[i].queueFlags & avoid)) {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fall back to only required
|
// Fall back to only required
|
||||||
for (size_t i = 0; i < qfsize; i++) {
|
for (size_t i = 0; i < qfsize; i++) {
|
||||||
if ((compute_index < 0 || i != compute_index) && queue_family_props[i].queueFlags & required) {
|
if (queue_family_props[i].queueCount >= min_num_queues && (compute_index < 0 || i != compute_index) && queue_family_props[i].queueFlags & required) {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fall back to reusing compute queue
|
// Fall back to reusing compute queue
|
||||||
for (size_t i = 0; i < qfsize; i++) {
|
for (size_t i = 0; i < qfsize; i++) {
|
||||||
if (queue_family_props[i].queueFlags & required) {
|
if (queue_family_props[i].queueCount >= min_num_queues && queue_family_props[i].queueFlags & required) {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -373,14 +373,18 @@ void ggml_vk_init(void) {
|
||||||
std::vector<vk::QueueFamilyProperties> queue_family_props = vk_physical_device.getQueueFamilyProperties();
|
std::vector<vk::QueueFamilyProperties> queue_family_props = vk_physical_device.getQueueFamilyProperties();
|
||||||
|
|
||||||
// Try to find a non-graphics compute queue and transfer-focused queues
|
// Try to find a non-graphics compute queue and transfer-focused queues
|
||||||
uint32_t compute_queue_family_index = ggml_vk_find_queue_family_index(queue_family_props, vk::QueueFlagBits::eCompute, vk::QueueFlagBits::eGraphics, -1);
|
uint32_t compute_queue_family_index = ggml_vk_find_queue_family_index(queue_family_props, vk::QueueFlagBits::eCompute, vk::QueueFlagBits::eGraphics, -1, 1);
|
||||||
uint32_t transfer_queue_family_index = ggml_vk_find_queue_family_index(queue_family_props, vk::QueueFlagBits::eTransfer, vk::QueueFlagBits::eCompute | vk::QueueFlagBits::eGraphics | vk::QueueFlagBits::eVideoDecodeKHR | vk::QueueFlagBits::eProtected | vk::QueueFlagBits::eOpticalFlowNV, compute_queue_family_index);
|
uint32_t transfer_queue_family_index = ggml_vk_find_queue_family_index(queue_family_props, vk::QueueFlagBits::eTransfer, vk::QueueFlagBits::eCompute | vk::QueueFlagBits::eGraphics | vk::QueueFlagBits::eVideoDecodeKHR | vk::QueueFlagBits::eProtected | vk::QueueFlagBits::eOpticalFlowNV, compute_queue_family_index, 2);
|
||||||
|
|
||||||
const float compute_queue_priority = 1.0f;
|
const float compute_queue_priority = 1.0f;
|
||||||
const float transfer_queue_priority[] = { 1.0f, 1.0f };
|
const float transfer_queue_priority[] = { 1.0f, 1.0f, 1.0f };
|
||||||
std::vector<vk::DeviceQueueCreateInfo> device_queue_create_infos;
|
std::vector<vk::DeviceQueueCreateInfo> device_queue_create_infos;
|
||||||
device_queue_create_infos.push_back({vk::DeviceQueueCreateFlags(), compute_queue_family_index, 1, &compute_queue_priority});
|
if (compute_queue_family_index != transfer_queue_family_index) {
|
||||||
device_queue_create_infos.push_back({vk::DeviceQueueCreateFlags(), transfer_queue_family_index, VK_TRANSFER_QUEUE_COUNT, transfer_queue_priority});
|
device_queue_create_infos.push_back({vk::DeviceQueueCreateFlags(), compute_queue_family_index, 1, &compute_queue_priority});
|
||||||
|
device_queue_create_infos.push_back({vk::DeviceQueueCreateFlags(), transfer_queue_family_index, VK_TRANSFER_QUEUE_COUNT, transfer_queue_priority});
|
||||||
|
} else {
|
||||||
|
device_queue_create_infos.push_back({vk::DeviceQueueCreateFlags(), transfer_queue_family_index, 1 + VK_TRANSFER_QUEUE_COUNT, transfer_queue_priority});
|
||||||
|
}
|
||||||
vk::DeviceCreateInfo device_create_info;
|
vk::DeviceCreateInfo device_create_info;
|
||||||
std::vector<const char *> device_extensions;
|
std::vector<const char *> device_extensions;
|
||||||
vk::PhysicalDeviceFeatures device_features = vk_physical_device.getFeatures();
|
vk::PhysicalDeviceFeatures device_features = vk_physical_device.getFeatures();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue