Also double the number of rows for Intel GPUs

This commit is contained in:
0cc4m 2024-12-13 08:13:15 +01:00 committed by GitHub
parent 1aa26d783a
commit 20b47d4d94
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1846,9 +1846,9 @@ static void ggml_vk_load_shaders(vk_device& device) {
// mul mat vec
// AMD GCN graphics cards perform best when the number of rows per shader is doubled
// AMD GCN and Intel graphics cards perform best when the number of rows per shader is doubled
uint32_t rm = 1;
if ((device->vendor_id == VK_VENDOR_ID_AMD) && (device->subgroup_min_size == 64) && (device->subgroup_max_size == 64))
if ((device->vendor_id == VK_VENDOR_ID_AMD && device->subgroup_min_size == 64 && device->subgroup_max_size == 64) || device->vendor_id == VK_VENDOR_ID_INTEL)
rm = 2;
// computing additional rows per workgroup is a benefit for Q4_0 -> Q5_1, but not for Q8_0.