Add inverse chat template metadata
This commit is contained in:
parent
1e43630218
commit
ebcbc45711
3 changed files with 15 additions and 0 deletions
|
@ -166,6 +166,7 @@ class Keys:
|
||||||
CHAT_TEMPLATE = "tokenizer.chat_template"
|
CHAT_TEMPLATE = "tokenizer.chat_template"
|
||||||
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
|
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
|
||||||
CHAT_TEMPLATES = "tokenizer.chat_templates"
|
CHAT_TEMPLATES = "tokenizer.chat_templates"
|
||||||
|
INVERSE_TEMPLATE = "tokenizer.inverse_template"
|
||||||
# FIM/Infill special tokens constants
|
# FIM/Infill special tokens constants
|
||||||
PREFIX_ID = "tokenizer.ggml.prefix_token_id"
|
PREFIX_ID = "tokenizer.ggml.prefix_token_id"
|
||||||
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
|
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
|
||||||
|
|
|
@ -840,6 +840,9 @@ class GGUFWriter:
|
||||||
|
|
||||||
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
|
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
|
||||||
|
|
||||||
|
def add_inverse_template(self, value: str) -> None:
|
||||||
|
self.add_string(Keys.Tokenizer.INVERSE_TEMPLATE, value)
|
||||||
|
|
||||||
def add_prefix_token_id(self, id: int) -> None:
|
def add_prefix_token_id(self, id: int) -> None:
|
||||||
self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
|
self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ class SpecialVocab:
|
||||||
add_special_token: dict[str, bool]
|
add_special_token: dict[str, bool]
|
||||||
special_token_ids: dict[str, int]
|
special_token_ids: dict[str, int]
|
||||||
chat_template: str | Sequence[Mapping[str, str]] | None
|
chat_template: str | Sequence[Mapping[str, str]] | None
|
||||||
|
inverse_template: str | None
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, path: str | os.PathLike[str], load_merges: bool = False,
|
self, path: str | os.PathLike[str], load_merges: bool = False,
|
||||||
|
@ -33,6 +34,7 @@ class SpecialVocab:
|
||||||
self.load_merges = load_merges
|
self.load_merges = load_merges
|
||||||
self.merges = []
|
self.merges = []
|
||||||
self.chat_template = None
|
self.chat_template = None
|
||||||
|
self.inverse_template = None
|
||||||
if special_token_types is not None:
|
if special_token_types is not None:
|
||||||
self.special_token_types = special_token_types
|
self.special_token_types = special_token_types
|
||||||
else:
|
else:
|
||||||
|
@ -71,6 +73,10 @@ class SpecialVocab:
|
||||||
if not quiet:
|
if not quiet:
|
||||||
logger.info(f'Setting chat_template to {self.chat_template}')
|
logger.info(f'Setting chat_template to {self.chat_template}')
|
||||||
gw.add_chat_template(self.chat_template)
|
gw.add_chat_template(self.chat_template)
|
||||||
|
if self.inverse_template is not None:
|
||||||
|
if not quiet:
|
||||||
|
logger.info(f'Setting inverse_template to {self.inverse_template}')
|
||||||
|
gw.add_inverse_template(self.inverse_template)
|
||||||
|
|
||||||
def _load(self, path: Path) -> None:
|
def _load(self, path: Path) -> None:
|
||||||
self._try_load_from_tokenizer_json(path)
|
self._try_load_from_tokenizer_json(path)
|
||||||
|
@ -137,6 +143,11 @@ class SpecialVocab:
|
||||||
self.chat_template = chat_template
|
self.chat_template = chat_template
|
||||||
else:
|
else:
|
||||||
logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring')
|
logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring')
|
||||||
|
inverse_template = tokenizer_config.get('inverse_template')
|
||||||
|
if inverse_template is None or isinstance(inverse_template, str):
|
||||||
|
self.inverse_template = inverse_template
|
||||||
|
else:
|
||||||
|
logger.warning(f'Bad type for inverse_template field in {tokenizer_config_file!r} - ignoring')
|
||||||
for typ in self.special_token_types:
|
for typ in self.special_token_types:
|
||||||
add_entry = tokenizer_config.get(f'add_{typ}_token')
|
add_entry = tokenizer_config.get(f'add_{typ}_token')
|
||||||
if isinstance(add_entry, bool):
|
if isinstance(add_entry, bool):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue