Fix spm whitespaces (#2806)
* llama.cpp : fix spm whitespace escaping + clean up * main.cpp : spm - add whitespace in front of prompt * test-tokenizer-0.cpp : spm - add whitespace in front of prompt
This commit is contained in:
parent
bae5c5f679
commit
2ba83c8685
3 changed files with 27 additions and 41 deletions
|
@ -100,7 +100,8 @@ int main(int argc, char **argv) {
|
|||
bool success = true;
|
||||
|
||||
for (const auto & test_kv : k_tests()) {
|
||||
std::vector<llama_token> res = llama_tokenize(ctx, test_kv.first, true);
|
||||
// Add a space in front of the first character to match OG llama tokenizer behavior
|
||||
std::vector<llama_token> res = llama_tokenize(ctx, " " + test_kv.first, true);
|
||||
fprintf(stderr, "%s : '%s' tokenized to '%s'\n",
|
||||
__func__, test_kv.first.c_str(), unescape_whitespace(ctx, res).c_str());
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue