Fixing the last deviations from sentencepiece indicated by test-tokenizer-1 (#3170)
* Fix für #2721 * Reenable tokenizer test for LLaMa * Add `console.cpp` dependency * Fix dependency to `common` * Fixing wrong fix. * Make console usage platform specific Work on compiler warnings. * Adapting makefile * Remove trailing whitespace * Adapting the other parts of the makefile * Fix typo. * Fixing the last deviations from sentencepiece indicated by test-tokenizer-1 * Simplify logic * Add missing change... * Fix ugly compiler warning * llama_tokenize should accept strings containing NUL now * Adding huichen's test case
This commit is contained in:
parent
e6616cf0db
commit
b08e75baea
6 changed files with 17 additions and 14 deletions
|
@ -965,10 +965,10 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
|
|||
|
||||
buf[size] = '\0';
|
||||
|
||||
int n_tokens = llama_tokenize(lctx, buf.data(), out.data(), out.size(), false);
|
||||
int n_tokens = llama_tokenize(lctx, buf.data(), buf.size(), out.data(), out.size(), false);
|
||||
if (n_tokens < 0) {
|
||||
out.resize(-n_tokens);
|
||||
n_tokens = llama_tokenize(lctx, buf.data(), out.data(), out.size(), false);
|
||||
n_tokens = llama_tokenize(lctx, buf.data(), buf.size(), out.data(), out.size(), false);
|
||||
}
|
||||
GGML_ASSERT(n_tokens >= 0);
|
||||
out.resize(n_tokens);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue