Stop the generation when <|eom_id|> token is encountered - needed for Llama 3.1 tool call support (#8858)

* gguf-py, llama : add constants and methods related to Llama-3.1 <|eom_id|> token

* llama : find Llama-3.1 <|eom_id|> token id during vocab loading

* llama-vocab : add Llama-3.1 <|eom_id|> token to the set of tokens stopping the generation

---------

Co-authored-by: Stanisław Szymczyk <sszymczy@gmail.com>
This commit is contained in:
fairydreaming 2024-08-05 09:38:01 +02:00 committed by GitHub
parent e31a4f6797
commit d3f0c7166a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 27 additions and 1 deletions

View file

@ -161,6 +161,7 @@ class Keys:
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
EOT_ID = "tokenizer.ggml.eot_token_id"
EOM_ID = "tokenizer.ggml.eom_token_id"
class Adapter:
TYPE = "adapter.type"
@ -1327,3 +1328,4 @@ KEY_TOKENIZER_PRIFIX_ID = Keys.Tokenizer.PREFIX_ID
KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID

View file

@ -828,6 +828,9 @@ class GGUFWriter:
def add_eot_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
def add_eom_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.EOM_ID, id)
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
pack_prefix = ''
if not skip_pack_prefix: