use subgroup min and max to check for gcn (requires https://github.com/ggerganov/llama.cpp/pull/10721)

This commit is contained in:
Eve 2024-12-08 17:26:56 -05:00
parent c7bc42cea0
commit 9af9e80163

View file

@ -1812,7 +1812,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
// AMD GCN graphics cards perform best when the number of rows per shader is doubled
uint32_t rm = 1;
if ((device->vendor_id == VK_VENDOR_ID_AMD) && (device->subgroup_size == 64))
if ((device->subgroup_size_control) && (device->vendor_id == VK_VENDOR_ID_AMD) && (device->subgroup_min_size == 64) && (device->subgroup_max_size == 64))
rm = 2;
// computing additional rows per workgroup is a benefit for Q4_0 -> Q5_1, but not for Q8_0.