feat: example comments in embedding

2024-04-24 10:14:02 +02:00 · 2024-04-24 10:14:02 +02:00 · dfa067631c
commit dfa067631c
parent dd060a2a4e
1 changed files with 8 additions and 0 deletions
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@ -49,6 +49,12 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
        }

        float * out = output + batch.seq_id[i][0] * n_embd;
+        //TODO: I would also add a parameter here to enable normalization or not.
+        /*fprintf(stdout, "unnormalized_embedding:");
+        for (int hh = 0; hh < n_embd; hh++) {
+            fprintf(stdout, "%9.6f ", embd[hh]);
+        }
+        fprintf(stdout, "\n");*/
        llama_embd_normalize(embd, out, n_embd);
    }
 }
@ -124,6 +130,8 @@ int main(int argc, char ** argv) {
    }

    // add SEP if not present
+    // JoanFM: I propose to remove this line so that user can make sure that their model is properly configured to tokenize as expected.
+    // We could also add a parameter, but I think that adding parameters specific for the examples can become messy and unmantaibable easy
    for (auto & inp : inputs) {
        if (inp.empty() || inp.back() != llama_token_sep(model)) {
            inp.push_back(llama_token_sep(model));