llama : tokenizer fixes (#2549)

* Merge tokenizer fixes into the gguf branch.

* Add test vocabularies
This commit is contained in:
goerch 2023-08-14 18:30:28 +02:00 committed by GitHub
parent 8af3a99ff1
commit ec1b100720
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 612 additions and 147 deletions

View file

@ -62,7 +62,7 @@ int main(int argc, char ** argv) {
fprintf(stderr, "\n\n");
for (auto id : tokens_list) {
fprintf(stderr, "%s", llama_token_to_str(ctx, id));
fprintf(stderr, "%s", llama_token_to_str(ctx, id).c_str());
}
fflush(stderr);
@ -109,7 +109,7 @@ int main(int argc, char ** argv) {
}
// print the new token :
printf("%s", llama_token_to_str(ctx, new_token_id));
printf("%s", llama_token_to_str(ctx, new_token_id).c_str());
fflush(stdout);
// push this new token for next evaluation