Stop the generation when <|eom_id|> token is encountered - needed for Llama 3.1 tool call support (#8858)
* gguf-py, llama : add constants and methods related to Llama-3.1 <|eom_id|> token * llama : find Llama-3.1 <|eom_id|> token id during vocab loading * llama-vocab : add Llama-3.1 <|eom_id|> token to the set of tokens stopping the generation --------- Co-authored-by: Stanisław Szymczyk <sszymczy@gmail.com>
This commit is contained in:
parent
e31a4f6797
commit
d3f0c7166a
5 changed files with 27 additions and 1 deletions
|
@ -828,6 +828,9 @@ class GGUFWriter:
|
|||
def add_eot_token_id(self, id: int) -> None:
|
||||
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
|
||||
|
||||
def add_eom_token_id(self, id: int) -> None:
|
||||
self.add_uint32(Keys.Tokenizer.EOM_ID, id)
|
||||
|
||||
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
||||
pack_prefix = ''
|
||||
if not skip_pack_prefix:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue