unicode : always use std::wregex

This commit is contained in:
Georgi Gerganov 2024-04-27 17:11:34 +03:00
parent 2affd0b221
commit ce5485aee0
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
3 changed files with 6 additions and 35 deletions

View file

@ -130,16 +130,14 @@ int main(int argc, char **argv) {
llama_detokenize_bpe(ctx, test_kv.second).c_str());
fprintf(stderr, "%s : expected tokens: ", __func__);
for (const auto & t : test_kv.second) {
fprintf(stderr, "%6d, ", t);
fprintf(stderr, "%6d '%s', ", t, llama_token_to_piece(ctx, t).c_str());
}
fprintf(stderr, "\n");
fprintf(stderr, "%s : got tokens: ", __func__);
for (const auto & t : res) {
fprintf(stderr, "%6d, ", t);
fprintf(stderr, "%6d '%s', ", t, llama_token_to_piece(ctx, t).c_str());
}
fprintf(stderr, "\n");
success = false;
}
}