llama : pre-allocate input tensors in a separate buffer (#5100)

This commit is contained in:
slaren 2024-01-24 12:48:14 +01:00 committed by GitHub
parent 26d607608d
commit 1387ea2117
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 167 additions and 182 deletions

View file

@ -109,8 +109,8 @@ void ggml_tallocr_alloc(ggml_tallocr_t alloc, struct ggml_tensor * tensor) {
if (block->size >= size) {
best_fit_block = alloc->n_free_blocks - 1;
} else {
fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
__func__, size, max_avail);
fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, largest block available %zu)\n",
__func__, tensor->name, size, max_avail);
GGML_ASSERT(!"not enough space in the buffer");
return;
}