Add environment variable GGML_VK_FORCE_MAX_ALLOCATION_SIZE to limit max buffer size

Rename GGML_VULKAN_DISABLE_F16 to GGML_VK_DISABLE_F16 for consistency
This commit is contained in:
0cc4m 2024-03-02 18:53:35 +01:00
parent f4ec9a06ea
commit a6042049be

View file

@ -1225,8 +1225,8 @@ static void ggml_vk_print_gpu_info(size_t idx) {
} }
} }
const char* GGML_VULKAN_DISABLE_F16 = getenv("GGML_VULKAN_DISABLE_F16"); const char* GGML_VK_DISABLE_F16 = getenv("GGML_VK_DISABLE_F16");
bool force_disable_f16 = GGML_VULKAN_DISABLE_F16 != nullptr; bool force_disable_f16 = GGML_VK_DISABLE_F16 != nullptr;
bool fp16 = !force_disable_f16 && fp16_storage && fp16_compute; bool fp16 = !force_disable_f16 && fp16_storage && fp16_compute;
@ -1382,7 +1382,11 @@ static void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) {
ctx->device->physical_device.getProperties2(&props2); ctx->device->physical_device.getProperties2(&props2);
ctx->device->properties = props2.properties; ctx->device->properties = props2.properties;
if (maintenance4_support) { const char* GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv("GGML_VK_FORCE_MAX_ALLOCATION_SIZE");
if (GGML_VK_FORCE_MAX_ALLOCATION_SIZE != nullptr) {
ctx->device->max_memory_allocation_size = std::stoi(GGML_VK_FORCE_MAX_ALLOCATION_SIZE);
} else if (maintenance4_support) {
ctx->device->max_memory_allocation_size = std::min(props3.maxMemoryAllocationSize, props4.maxBufferSize); ctx->device->max_memory_allocation_size = std::min(props3.maxMemoryAllocationSize, props4.maxBufferSize);
} else { } else {
ctx->device->max_memory_allocation_size = props3.maxMemoryAllocationSize; ctx->device->max_memory_allocation_size = props3.maxMemoryAllocationSize;
@ -1402,29 +1406,9 @@ static void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) {
fp16_compute = true; fp16_compute = true;
} }
} }
ctx->device->physical_device.getProperties2(&props2);
ctx->device->properties = props2.properties;
if (maintenance4_support) { const char* GGML_VK_DISABLE_F16 = getenv("GGML_VK_DISABLE_F16");
ctx->device->max_memory_allocation_size = std::min(props3.maxMemoryAllocationSize, props4.maxBufferSize); const bool force_disable_f16 = GGML_VK_DISABLE_F16 != nullptr;
} else {
ctx->device->max_memory_allocation_size = props3.maxMemoryAllocationSize;
}
ctx->device->vendor_id = ctx->device->properties.vendorID;
ctx->device->subgroup_size = subgroup_props.subgroupSize;
ctx->device->uma = ctx->device->properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
for (auto properties : ext_props) {
if (strcmp("VK_KHR_16bit_storage", properties.extensionName) == 0) {
fp16_storage = true;
} else if (strcmp("VK_KHR_shader_float16_int8", properties.extensionName) == 0) {
fp16_compute = true;
}
}
const char* GGML_VULKAN_DISABLE_F16 = getenv("GGML_VULKAN_DISABLE_F16");
bool force_disable_f16 = GGML_VULKAN_DISABLE_F16 != nullptr;
ctx->device->fp16 = !force_disable_f16 && fp16_storage && fp16_compute; ctx->device->fp16 = !force_disable_f16 && fp16_storage && fp16_compute;