From 03dbcc89f63d140a1248883c241335e8d729542f Mon Sep 17 00:00:00 2001 From: jaime-m-p <> Date: Thu, 20 Jun 2024 19:20:37 +0200 Subject: [PATCH] minor: confusing hexadecimal codepoint --- tests/test-tokenizer-1-bpe.cpp | 2 +- tests/test-tokenizer-1-spm.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-tokenizer-1-bpe.cpp b/tests/test-tokenizer-1-bpe.cpp index e464b3358..9c748a9fb 100644 --- a/tests/test-tokenizer-1-bpe.cpp +++ b/tests/test-tokenizer-1-bpe.cpp @@ -125,7 +125,7 @@ int main(int argc, char **argv) { std::vector tokens = llama_tokenize(ctx, str, false); std::string check = llama_detokenize(ctx, tokens); if (cp != 9601 && str != check) { - fprintf(stderr, "error: codepoint %x detokenizes to '%s'(%zu) instead of '%s'(%zu)\n", + fprintf(stderr, "error: codepoint 0x%x detokenizes to '%s'(%zu) instead of '%s'(%zu)\n", cp, check.c_str(), check.length(), str.c_str(), str.length()); std::exit(3); } diff --git a/tests/test-tokenizer-1-spm.cpp b/tests/test-tokenizer-1-spm.cpp index e82da302c..cb37b4880 100644 --- a/tests/test-tokenizer-1-spm.cpp +++ b/tests/test-tokenizer-1-spm.cpp @@ -89,7 +89,7 @@ int main(int argc, char ** argv) { std::vector tokens = llama_tokenize(ctx, str, false, true); std::string check = llama_detokenize(ctx, tokens); if (cp != 9601 && str != check) { - fprintf(stderr, "error: codepoint %x detokenizes to '%s'(%zu) instead of '%s'(%zu)\n", + fprintf(stderr, "error: codepoint 0x%x detokenizes to '%s'(%zu) instead of '%s'(%zu)\n", cp, check.c_str(), check.length(), str.c_str(), str.length()); std::exit(3); }