From f22b2f2045f2cfcbd5389c51960ccc4ee4be76f8 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Fri, 10 Nov 2023 14:46:57 -0500 Subject: [PATCH] cleanup --- gguf-py/gguf/vocab.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/gguf-py/gguf/vocab.py b/gguf-py/gguf/vocab.py index b245a328b..88830d47e 100644 --- a/gguf-py/gguf/vocab.py +++ b/gguf-py/gguf/vocab.py @@ -31,7 +31,9 @@ class SpecialVocab: self._load(Path(path)) def __repr__(self) -> str: - return f'' + return ''.format( + len(self.merges), self.special_token_ids or "unset", self.add_special_token or "unset", + ) def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None: if self.merges: @@ -39,8 +41,10 @@ class SpecialVocab: print(f'gguf: Adding {len(self.merges)} merge(s).') gw.add_token_merges(self.merges) elif self.load_merges: - print('gguf: WARNING: Adding merges requested but no merges found, output may be non-functional.', - file = sys.stderr) + print( + 'gguf: WARNING: Adding merges requested but no merges found, output may be non-functional.', + file = sys.stderr, + ) for typ, tokid in self.special_token_ids.items(): handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None) if handler is None: @@ -78,12 +82,14 @@ class SpecialVocab: for line in fp: line_num += 1 line = line.strip() - if len(line) == 0: + if not line: continue parts = line.split(None, 3) if len(parts) != 2: - print(f'gguf: WARNING: {merges_file.name}: Line {line_num}: Entry malformed, ignoring', - file = sys.stderr) + print( + f'gguf: WARNING: {merges_file.name}: Line {line_num}: Entry malformed, ignoring', + file = sys.stderr, + ) continue merges.append(f'{parts[0]} {parts[1]}') self.merges = merges