From 49d6daa11ecd4d9f237be53420c456a476b7c073 Mon Sep 17 00:00:00 2001 From: xaedes Date: Sun, 7 May 2023 19:46:05 +0200 Subject: [PATCH] vastly improve training results instead of logit targets 0 and 1 use -1 and +1. --- examples/baby-llama/baby-llama.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index 0fbb01d5d..e5c548a05 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -1058,7 +1058,8 @@ void get_example_targets(int example_id, struct ggml_tensor * tokens_input, stru int n_tokens = tokens_input->ne[0]; int n_vocab = targets->ne[0]; float randomness = 0.0f; - ggml_set_zero(targets); + // ggml_set_zero(targets); + ggml_set_f32(targets, -1.0f); ggml_set_i32_1d(tokens_input, 0, 0); for (int i=1; i