increase measured alloc size by tensor_alignment

ggml_allocr_reset will reduce the given size by up to tensor_alignment-1
This commit is contained in:
xaedes 2023-09-02 15:59:14 +02:00
parent c32ad44f84
commit 6ee12b158b
No known key found for this signature in database
GPG key ID: 30030EDD817EA2B1

View file

@ -621,7 +621,7 @@ void init_lora(const struct my_llama_model * model, struct my_llama_lora * lora)
}
// allocate data
lora->data.resize(ggml_allocr_max_size(alloc));
lora->data.resize(ggml_allocr_max_size(alloc) + tensor_alignment);
ggml_allocr_free(alloc);
alloc = ggml_allocr_new(lora->data.data(), lora->data.size(), tensor_alignment);
ggml_allocr_alloc(alloc, lora->tok_embeddings_a);
@ -2547,7 +2547,7 @@ int main(int argc, char ** argv) {
alloc = ggml_allocr_new_measure(tensor_alignment);
ggml_allocr_alloc(alloc, tokens_input);
ggml_allocr_alloc(alloc, target_probs);
size_t max_input_size = ggml_allocr_max_size(alloc);
size_t max_input_size = ggml_allocr_max_size(alloc) + tensor_alignment;
ggml_allocr_free(alloc);
printf("%s: max_input_size = %zu bytes (%.1f MB)\n", __func__, max_input_size, (float) max_input_size / (1024.0f*1024.0f));
@ -2594,7 +2594,7 @@ int main(int argc, char ** argv) {
params.use_flash,
params.use_checkpointing
);
size_t max_compute_size = ggml_allocr_max_size(alloc);
size_t max_compute_size = ggml_allocr_max_size(alloc) + tensor_alignment;
ggml_allocr_free(alloc);
printf("%s: max_compute_size = %zu bytes (%.1f MB)\n", __func__, max_compute_size, (float) max_compute_size / (1024.0f*1024.0f));