Add environment variable GGML_VK_FORCE_MAX_ALLOCATION_SIZE to limit max buffer size
Rename GGML_VULKAN_DISABLE_F16 to GGML_VK_DISABLE_F16 for consistency
This commit is contained in:
parent
f4ec9a06ea
commit
a6042049be
1 changed files with 9 additions and 25 deletions
|
@ -1225,8 +1225,8 @@ static void ggml_vk_print_gpu_info(size_t idx) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const char* GGML_VULKAN_DISABLE_F16 = getenv("GGML_VULKAN_DISABLE_F16");
|
const char* GGML_VK_DISABLE_F16 = getenv("GGML_VK_DISABLE_F16");
|
||||||
bool force_disable_f16 = GGML_VULKAN_DISABLE_F16 != nullptr;
|
bool force_disable_f16 = GGML_VK_DISABLE_F16 != nullptr;
|
||||||
|
|
||||||
bool fp16 = !force_disable_f16 && fp16_storage && fp16_compute;
|
bool fp16 = !force_disable_f16 && fp16_storage && fp16_compute;
|
||||||
|
|
||||||
|
@ -1382,7 +1382,11 @@ static void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) {
|
||||||
ctx->device->physical_device.getProperties2(&props2);
|
ctx->device->physical_device.getProperties2(&props2);
|
||||||
ctx->device->properties = props2.properties;
|
ctx->device->properties = props2.properties;
|
||||||
|
|
||||||
if (maintenance4_support) {
|
const char* GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv("GGML_VK_FORCE_MAX_ALLOCATION_SIZE");
|
||||||
|
|
||||||
|
if (GGML_VK_FORCE_MAX_ALLOCATION_SIZE != nullptr) {
|
||||||
|
ctx->device->max_memory_allocation_size = std::stoi(GGML_VK_FORCE_MAX_ALLOCATION_SIZE);
|
||||||
|
} else if (maintenance4_support) {
|
||||||
ctx->device->max_memory_allocation_size = std::min(props3.maxMemoryAllocationSize, props4.maxBufferSize);
|
ctx->device->max_memory_allocation_size = std::min(props3.maxMemoryAllocationSize, props4.maxBufferSize);
|
||||||
} else {
|
} else {
|
||||||
ctx->device->max_memory_allocation_size = props3.maxMemoryAllocationSize;
|
ctx->device->max_memory_allocation_size = props3.maxMemoryAllocationSize;
|
||||||
|
@ -1402,29 +1406,9 @@ static void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) {
|
||||||
fp16_compute = true;
|
fp16_compute = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ctx->device->physical_device.getProperties2(&props2);
|
|
||||||
ctx->device->properties = props2.properties;
|
|
||||||
|
|
||||||
if (maintenance4_support) {
|
const char* GGML_VK_DISABLE_F16 = getenv("GGML_VK_DISABLE_F16");
|
||||||
ctx->device->max_memory_allocation_size = std::min(props3.maxMemoryAllocationSize, props4.maxBufferSize);
|
const bool force_disable_f16 = GGML_VK_DISABLE_F16 != nullptr;
|
||||||
} else {
|
|
||||||
ctx->device->max_memory_allocation_size = props3.maxMemoryAllocationSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx->device->vendor_id = ctx->device->properties.vendorID;
|
|
||||||
ctx->device->subgroup_size = subgroup_props.subgroupSize;
|
|
||||||
ctx->device->uma = ctx->device->properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
|
|
||||||
|
|
||||||
for (auto properties : ext_props) {
|
|
||||||
if (strcmp("VK_KHR_16bit_storage", properties.extensionName) == 0) {
|
|
||||||
fp16_storage = true;
|
|
||||||
} else if (strcmp("VK_KHR_shader_float16_int8", properties.extensionName) == 0) {
|
|
||||||
fp16_compute = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* GGML_VULKAN_DISABLE_F16 = getenv("GGML_VULKAN_DISABLE_F16");
|
|
||||||
bool force_disable_f16 = GGML_VULKAN_DISABLE_F16 != nullptr;
|
|
||||||
|
|
||||||
ctx->device->fp16 = !force_disable_f16 && fp16_storage && fp16_compute;
|
ctx->device->fp16 = !force_disable_f16 && fp16_storage && fp16_compute;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue