test-tokenizer-0.cpp : spm - add whitespace in front of prompt
This commit is contained in:
parent
43f7c16ad0
commit
c50b1ae6b8
1 changed files with 2 additions and 1 deletions
|
@ -100,7 +100,8 @@ int main(int argc, char **argv) {
|
||||||
bool success = true;
|
bool success = true;
|
||||||
|
|
||||||
for (const auto & test_kv : k_tests()) {
|
for (const auto & test_kv : k_tests()) {
|
||||||
std::vector<llama_token> res = llama_tokenize(ctx, test_kv.first, true);
|
// Add a space in front of the first character to match OG llama tokenizer behavior
|
||||||
|
std::vector<llama_token> res = llama_tokenize(ctx, " " + test_kv.first, true);
|
||||||
fprintf(stderr, "%s : '%s' tokenized to '%s'\n",
|
fprintf(stderr, "%s : '%s' tokenized to '%s'\n",
|
||||||
__func__, test_kv.first.c_str(), unescape_whitespace(ctx, res).c_str());
|
__func__, test_kv.first.c_str(), unescape_whitespace(ctx, res).c_str());
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue