From dfa067631c6f7e5f5c153794150d47bea4f5e439 Mon Sep 17 00:00:00 2001 From: Joan Martinez Date: Wed, 24 Apr 2024 10:14:02 +0200 Subject: [PATCH] feat: example comments in embedding --- examples/embedding/embedding.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 6a93147d7..fe357c44b 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -49,6 +49,12 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu } float * out = output + batch.seq_id[i][0] * n_embd; + //TODO: I would also add a parameter here to enable normalization or not. + /*fprintf(stdout, "unnormalized_embedding:"); + for (int hh = 0; hh < n_embd; hh++) { + fprintf(stdout, "%9.6f ", embd[hh]); + } + fprintf(stdout, "\n");*/ llama_embd_normalize(embd, out, n_embd); } } @@ -124,6 +130,8 @@ int main(int argc, char ** argv) { } // add SEP if not present + // JoanFM: I propose to remove this line so that user can make sure that their model is properly configured to tokenize as expected. + // We could also add a parameter, but I think that adding parameters specific for the examples can become messy and unmantaibable easy for (auto & inp : inputs) { if (inp.empty() || inp.back() != llama_token_sep(model)) { inp.push_back(llama_token_sep(model));