train : fix context size calculations

2023-11-03 14:40:45 +02:00 · 2023-11-03 14:40:45 +02:00 · b1592ea054
commit b1592ea054
parent dc22db734b
2 changed files with 6 additions and 10 deletions
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@ -1742,11 +1742,9 @@ int main(int argc, char ** argv) {
    ggml_allocr_free(alloc);

    // context for compute tensors without their data
-    size_t estimated_compute_size_wo_data = (
-        ggml_tensor_overhead()*LLAMA_TRAIN_MAX_NODES*2
-      + (GGML_OBJECT_SIZE+ggml_graph_overhead())*(
-            params.common.use_checkpointing ? 3 : 2
-        )
+    const size_t estimated_compute_size_wo_data = (
+            2*LLAMA_TRAIN_MAX_NODES*ggml_tensor_overhead() +
+            (params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
    );
    struct ggml_init_params ctx_compute_params = {
        estimated_compute_size_wo_data, // mem_size
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@ -1109,11 +1109,9 @@ int main(int argc, char ** argv) {
    ggml_allocr_free(alloc);

    // context for compute tensors without their data
-    size_t estimated_compute_size_wo_data = (
-        ggml_tensor_overhead()*LLAMA_TRAIN_MAX_NODES*2
-      + (GGML_OBJECT_SIZE+ggml_graph_overhead())*(
-            params.common.use_checkpointing ? 3 : 2
-        )
+    const size_t estimated_compute_size_wo_data = (
+            2*LLAMA_TRAIN_MAX_NODES*ggml_tensor_overhead() +
+            (params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
    );
    struct ggml_init_params ctx_compute_params = {
        estimated_compute_size_wo_data, // mem_size