Merge branch 'ggerganov:master' into master
This commit is contained in:
commit
094caea359
3 changed files with 21 additions and 6 deletions
|
@ -69,7 +69,7 @@ In this section, we cover the most commonly used options for running the `llama-
|
||||||
- `-c N, --ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference.
|
- `-c N, --ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference.
|
||||||
- `-mli, --multiline-input`: Allows you to write or paste multiple lines without ending each in '\'
|
- `-mli, --multiline-input`: Allows you to write or paste multiple lines without ending each in '\'
|
||||||
- `-t N, --threads N`: Set the number of threads to use during generation. For optimal performance, it is recommended to set this value to the number of physical CPU cores your system has.
|
- `-t N, --threads N`: Set the number of threads to use during generation. For optimal performance, it is recommended to set this value to the number of physical CPU cores your system has.
|
||||||
- - `-ngl N, --n-gpu-layers N`: When compiled with GPU support, this option allows offloading some layers to the GPU for computation. Generally results in increased performance.
|
- `-ngl N, --n-gpu-layers N`: When compiled with GPU support, this option allows offloading some layers to the GPU for computation. Generally results in increased performance.
|
||||||
|
|
||||||
## Input Prompts
|
## Input Prompts
|
||||||
|
|
||||||
|
|
|
@ -1070,10 +1070,25 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, vk::Memor
|
||||||
try {
|
try {
|
||||||
buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index });
|
buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index });
|
||||||
} catch (const vk::SystemError& e) {
|
} catch (const vk::SystemError& e) {
|
||||||
// Out of Host/Device memory, clean up buffer
|
if (buf->memory_property_flags != fallback_flags) {
|
||||||
device->device.destroyBuffer(buf->buffer);
|
// Try again with fallback flags
|
||||||
buf->size = 0;
|
memory_type_index = find_properties(&mem_props, &mem_req, fallback_flags);
|
||||||
throw e;
|
buf->memory_property_flags = fallback_flags;
|
||||||
|
|
||||||
|
try {
|
||||||
|
buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index });
|
||||||
|
}
|
||||||
|
catch (const vk::SystemError& e) {
|
||||||
|
device->device.destroyBuffer(buf->buffer);
|
||||||
|
buf->size = 0;
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Out of Host/Device memory, clean up buffer
|
||||||
|
device->device.destroyBuffer(buf->buffer);
|
||||||
|
buf->size = 0;
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
buf->ptr = nullptr;
|
buf->ptr = nullptr;
|
||||||
|
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
0d7ecbbe536dc84240f646e0ec0a712251377f34
|
564f42082f858f9674b2a2e06e9e779d9ed2c754
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue