Stop the generation when <|eom_id|> token is encountered - needed for Llama 3.1 tool call support (#8858)

* gguf-py, llama : add constants and methods related to Llama-3.1 <|eom_id|> token

* llama : find Llama-3.1 <|eom_id|> token id during vocab loading

* llama-vocab : add Llama-3.1 <|eom_id|> token to the set of tokens stopping the generation

---------

Co-authored-by: Stanisław Szymczyk <sszymczy@gmail.com>
This commit is contained in:
fairydreaming 2024-08-05 09:38:01 +02:00 committed by GitHub
parent e31a4f6797
commit d3f0c7166a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 27 additions and 1 deletions

View file

@ -45,6 +45,7 @@ struct llama_vocab {
id special_suffix_id = -1;
id special_middle_id = -1;
id special_eot_id = -1; // TODO: move above after "eos_id", and here add "file separator" token
id special_eom_id = -1;
// tokenizer flags
bool tokenizer_add_space_prefix = false;
@ -101,6 +102,7 @@ llama_token llama_token_prefix_impl(const struct llama_vocab & vocab);
llama_token llama_token_middle_impl(const struct llama_vocab & vocab);
llama_token llama_token_suffix_impl(const struct llama_vocab & vocab);
llama_token llama_token_eot_impl (const struct llama_vocab & vocab);
llama_token llama_token_eom_impl (const struct llama_vocab & vocab);
int32_t llama_tokenize_impl(
const struct llama_vocab & vocab,