llama : tokenizer fixes (#2549)

* Merge tokenizer fixes into the gguf branch. * Add test vocabularies
2023-08-14 18:30:28 +02:00 · 2023-08-14 18:30:28 +02:00 · ec1b100720
commit ec1b100720
parent 8af3a99ff1
17 changed files with 612 additions and 147 deletions
--- a/examples/simple/simple.cpp
+++ b/examples/simple/simple.cpp
@ -62,7 +62,7 @@ int main(int argc, char ** argv) {
    fprintf(stderr, "\n\n");

    for (auto id : tokens_list) {
-        fprintf(stderr, "%s", llama_token_to_str(ctx, id));
+        fprintf(stderr, "%s", llama_token_to_str(ctx, id).c_str());
    }

    fflush(stderr);
@ -109,7 +109,7 @@ int main(int argc, char ** argv) {
        }

        // print the new token :
-        printf("%s", llama_token_to_str(ctx, new_token_id));
+        printf("%s", llama_token_to_str(ctx, new_token_id).c_str());
        fflush(stdout);

        // push this new token for next evaluation