Ensure tgt and dft have same add_bos setting

This commit is contained in:
Branden Butler 2023-11-18 12:49:35 -06:00
parent e778ce4a4c
commit 9cfc5e2160

View file

@ -96,11 +96,20 @@ int main(int argc, char ** argv) {
// Tokenize the prompt
const bool add_bos = llama_should_add_bos_token(model_tgt);
LOG("add_bos: %d\n", add_bos);
const bool add_bos_tgt = llama_should_add_bos_token(model_tgt);
LOG("add_bos tgt: %d\n", add_bos_tgt);
const bool add_bos_dft = llama_should_add_bos_token(model_dft);
LOG("add_bos dft: %d\n", add_bos_dft);
if (add_bos_tgt != add_bos_dft) {
fprintf(stderr, "%s: error: draft model add_bos must match target model to use speculation but ", __func__);
fprintf(stderr, "add_bos_dft = %d while add_bos_tgt = %d\n", add_bos_dft, add_bos_tgt);
return 1;
}
std::vector<llama_token> inp;
inp = ::llama_tokenize(ctx_tgt, params.prompt, add_bos, true);
inp = ::llama_tokenize(ctx_tgt, params.prompt, add_bos_tgt, true);
const int max_context_size = llama_n_ctx(ctx_tgt);
const int max_tokens_list_size = max_context_size - 4;