From 69554cee9e8de9305147aa4f84cf515f73036301 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Tue, 25 Jul 2023 07:01:02 +0200 Subject: [PATCH] Add fallback for devices only supporting one DescriptorSet per DescriptorPool --- ggml-vulkan.cpp | 67 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 9a9fb3a2e..957da19cf 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -40,6 +40,10 @@ #define VK_VENDOR_ID_INTEL 0x8086 #define VK_VENDOR_ID_NVIDIA 0x10de +#define VK_DEVICE_DESCRIPTOR_POOL_MODE_UNKNOWN 0 +#define VK_DEVICE_DESCRIPTOR_POOL_MODE_MULTI 1 +#define VK_DEVICE_DESCRIPTOR_POOL_MODE_SINGLE 2 + struct vk_buffer { vk::Buffer buffer; vk::DeviceMemory device_memory; @@ -61,7 +65,7 @@ struct vk_subbuffer { struct vk_pipeline { std::string name; vk::DescriptorSetLayout dsl; - vk::DescriptorPool descriptor_pool; + std::vector descriptor_pools; std::vector descriptor_sets; uint32_t descriptor_set_index; vk::PipelineLayout layout; @@ -117,6 +121,7 @@ struct vk_device { uint32_t vendor_id; vk_queue compute_queue; vk_queue transfer_queues[VK_TRANSFER_QUEUE_COUNT]; + uint32_t descriptor_set_mode; }; typedef std::vector vk_sequence; @@ -189,9 +194,35 @@ static vk_pipeline ggml_vk_create_pipeline(const std::string& path, const std::s descriptor_set_layout_create_info.setPNext(&dslbfci); pipeline.dsl = vk_device.device.createDescriptorSetLayout(descriptor_set_layout_create_info); - vk::DescriptorPoolSize descriptor_pool_size(vk::DescriptorType::eStorageBuffer, pipeline.parameter_count); - vk::DescriptorPoolCreateInfo descriptor_pool_create_info({}, 128, descriptor_pool_size); - pipeline.descriptor_pool = vk_device.device.createDescriptorPool(descriptor_pool_create_info); + // Check if device supports multiple descriptors per pool + if (vk_device.descriptor_set_mode == VK_DEVICE_DESCRIPTOR_POOL_MODE_UNKNOWN) { + const uint32_t alloc_count = 2; + + // Try allocating multiple sets from one pool + // This fails on AMD for some reason, so add a fall back to allocating one pool per set + vk::DescriptorPoolSize descriptor_pool_size(vk::DescriptorType::eStorageBuffer, pipeline.parameter_count); + vk::DescriptorPoolCreateInfo descriptor_pool_create_info({}, alloc_count, descriptor_pool_size); + vk::DescriptorPool pool = vk_device.device.createDescriptorPool(descriptor_pool_create_info); + + std::vector layouts(alloc_count); + for (uint32_t i = 0; i < alloc_count; i++) { + layouts[i] = pipeline.dsl; + } + try { + vk::DescriptorSetAllocateInfo descriptor_set_alloc_info(pool, alloc_count, layouts.data()); + std::vector sets = vk_device.device.allocateDescriptorSets(descriptor_set_alloc_info); + } catch(vk::OutOfPoolMemoryError const&) { + vk_device.descriptor_set_mode = VK_DEVICE_DESCRIPTOR_POOL_MODE_SINGLE; + } + + vk_device.device.destroyDescriptorPool(pool); + } + + if (vk_device.descriptor_set_mode == VK_DEVICE_DESCRIPTOR_POOL_MODE_MULTI) { + vk::DescriptorPoolSize descriptor_pool_size(vk::DescriptorType::eStorageBuffer, pipeline.parameter_count); + vk::DescriptorPoolCreateInfo descriptor_pool_create_info({}, 128, descriptor_pool_size); + pipeline.descriptor_pools.push_back(vk_device.device.createDescriptorPool(descriptor_pool_create_info)); + } pipeline.descriptor_set_index = 0; @@ -237,13 +268,27 @@ static void ggml_vk_pipeline_allocate_descriptor_sets(vk_pipeline& pipeline, uin return; } - std::vector layouts(n); - for (uint32_t i = 0; i < n; i++) { - layouts[i] = pipeline.dsl; + if (vk_device.descriptor_set_mode == VK_DEVICE_DESCRIPTOR_POOL_MODE_MULTI) { + const uint32_t alloc_count = n - pipeline.descriptor_sets.size(); + + std::vector layouts(alloc_count); + for (uint32_t i = 0; i < alloc_count; i++) { + layouts[i] = pipeline.dsl; + } + vk::DescriptorSetAllocateInfo descriptor_set_alloc_info(pipeline.descriptor_pools[0], alloc_count, layouts.data()); + std::vector sets = vk_device.device.allocateDescriptorSets(descriptor_set_alloc_info); + pipeline.descriptor_sets.insert(pipeline.descriptor_sets.end(), sets.begin(), sets.end()); + } else { + for (uint32_t i = pipeline.descriptor_sets.size(); i < n; i++) { + vk::DescriptorPoolSize descriptor_pool_size(vk::DescriptorType::eStorageBuffer, pipeline.parameter_count); + vk::DescriptorPoolCreateInfo descriptor_pool_create_info({}, 1, descriptor_pool_size); + pipeline.descriptor_pools.push_back(vk_device.device.createDescriptorPool(descriptor_pool_create_info)); + + vk::DescriptorSetAllocateInfo descriptor_set_alloc_info(pipeline.descriptor_pools[i], 1, &pipeline.dsl); + std::vector sets = vk_device.device.allocateDescriptorSets(descriptor_set_alloc_info); + pipeline.descriptor_sets.push_back(sets[0]); + } } - vk::DescriptorSetAllocateInfo descriptor_set_alloc_info(pipeline.descriptor_pool, n - pipeline.descriptor_sets.size(), layouts.data()); - std::vector sets = vk_device.device.allocateDescriptorSets(descriptor_set_alloc_info); - pipeline.descriptor_sets.insert(pipeline.descriptor_sets.end(), sets.begin(), sets.end()); } static void ggml_vk_pipeline_cleanup(vk_pipeline& pipeline) { @@ -667,6 +712,8 @@ void ggml_vk_init(void) { device_create_info.setPNext(&device_features2); vk_device.device = vk_device.physical_device.createDevice(device_create_info); + vk_device.descriptor_set_mode = VK_DEVICE_DESCRIPTOR_POOL_MODE_UNKNOWN; + vk_pinned_workspace = nullptr; vk_pinned_workspace_size = 0;