llama : pre-allocate input tensors in a separate buffer (#5100)
This commit is contained in:
parent
26d607608d
commit
1387ea2117
2 changed files with 167 additions and 182 deletions
|
@ -109,8 +109,8 @@ void ggml_tallocr_alloc(ggml_tallocr_t alloc, struct ggml_tensor * tensor) {
|
|||
if (block->size >= size) {
|
||||
best_fit_block = alloc->n_free_blocks - 1;
|
||||
} else {
|
||||
fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
|
||||
__func__, size, max_avail);
|
||||
fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, largest block available %zu)\n",
|
||||
__func__, tensor->name, size, max_avail);
|
||||
GGML_ASSERT(!"not enough space in the buffer");
|
||||
return;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue