Stop the generation when <|eom_id|> token is encountered - needed for Llama 3.1 tool call support (#8858)

* gguf-py, llama : add constants and methods related to Llama-3.1 <|eom_id|> token * llama : find Llama-3.1 <|eom_id|> token id during vocab loading * llama-vocab : add Llama-3.1 <|eom_id|> token to the set of tokens stopping the generation --------- Co-authored-by: Stanisław Szymczyk <sszymczy@gmail.com>
2024-08-05 09:38:01 +02:00 · 2024-08-05 09:38:01 +02:00 · d3f0c7166a
commit d3f0c7166a
parent e31a4f6797
5 changed files with 27 additions and 1 deletions
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@ -828,6 +828,9 @@ class GGUFWriter:
    def add_eot_token_id(self, id: int) -> None:
        self.add_uint32(Keys.Tokenizer.EOT_ID, id)

+    def add_eom_token_id(self, id: int) -> None:
+        self.add_uint32(Keys.Tokenizer.EOM_ID, id)
+
    def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
        pack_prefix = ''
        if not skip_pack_prefix: