llava : fix tokenization to not add bos between image embeddings and user prompt (#3645)

* llava : fix tokenization to not add bos after system prompt

* set seed

---------

Co-authored-by: M. Yusuf Sarıgöz <yusufsarigoz@gmail.com>
This commit is contained in:
Georgi Gerganov 2023-10-16 23:58:00 +03:00 committed by GitHub
parent 11bff29045
commit 940efa95fe
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 7 deletions

View file

@ -49,9 +49,9 @@ inline bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
return eval_tokens(ctx_llama, tokens, 1, n_past);
}
inline bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past){
inline bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
std::string str2 = str;
std::vector<llama_token> embd_inp = ::llama_tokenize(ctx_llama, str2, true);
std::vector<llama_token> embd_inp = ::llama_tokenize(ctx_llama, str2, add_bos);
eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
return true;
}