bug fixes in tokenize_file
This commit is contained in:
parent
5d94997a09
commit
76d2794e11
1 changed files with 4 additions and 1 deletions
|
@ -1319,6 +1319,7 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
|
||||||
|
|
||||||
std::vector<char> buf;
|
std::vector<char> buf;
|
||||||
buf.resize(size+1);
|
buf.resize(size+1);
|
||||||
|
out.resize(size+1);
|
||||||
|
|
||||||
if (std::fread(buf.data(), size, 1, fp) != 1) {
|
if (std::fread(buf.data(), size, 1, fp) != 1) {
|
||||||
throw std::runtime_error(std::string("unexpectedly reached end of file"));
|
throw std::runtime_error(std::string("unexpectedly reached end of file"));
|
||||||
|
@ -1332,8 +1333,10 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
|
||||||
int n_tokens = llama_tokenize(lctx, buf.data(), out.data(), out.size(), false);
|
int n_tokens = llama_tokenize(lctx, buf.data(), out.data(), out.size(), false);
|
||||||
if (n_tokens < 0) {
|
if (n_tokens < 0) {
|
||||||
out.resize(-n_tokens);
|
out.resize(-n_tokens);
|
||||||
llama_tokenize(lctx, buf.data(), out.data(), out.size(), false);
|
n_tokens = llama_tokenize(lctx, buf.data(), out.data(), out.size(), false);
|
||||||
}
|
}
|
||||||
|
GGML_ASSERT(n_tokens >= 0);
|
||||||
|
out.resize(n_tokens);
|
||||||
|
|
||||||
bool verify = false;
|
bool verify = false;
|
||||||
if (verify) {
|
if (verify) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue