bug fixes in tokenize_file

This commit is contained in:
xaedes 2023-08-28 01:47:31 +02:00
parent 5d94997a09
commit 76d2794e11
No known key found for this signature in database
GPG key ID: 30030EDD817EA2B1

View file

@ -1319,6 +1319,7 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
std::vector<char> buf; std::vector<char> buf;
buf.resize(size+1); buf.resize(size+1);
out.resize(size+1);
if (std::fread(buf.data(), size, 1, fp) != 1) { if (std::fread(buf.data(), size, 1, fp) != 1) {
throw std::runtime_error(std::string("unexpectedly reached end of file")); throw std::runtime_error(std::string("unexpectedly reached end of file"));
@ -1332,8 +1333,10 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
int n_tokens = llama_tokenize(lctx, buf.data(), out.data(), out.size(), false); int n_tokens = llama_tokenize(lctx, buf.data(), out.data(), out.size(), false);
if (n_tokens < 0) { if (n_tokens < 0) {
out.resize(-n_tokens); out.resize(-n_tokens);
llama_tokenize(lctx, buf.data(), out.data(), out.size(), false); n_tokens = llama_tokenize(lctx, buf.data(), out.data(), out.size(), false);
} }
GGML_ASSERT(n_tokens >= 0);
out.resize(n_tokens);
bool verify = false; bool verify = false;
if (verify) { if (verify) {