convert-hf-to-gguf.py: print() --> logger

This commit is contained in:
brian khuu 2024-04-24 22:50:34 +10:00
parent 2d2bc99385
commit 5e5e74e3b8

View file

@ -441,9 +441,7 @@ class Model(ABC):
if vocab_size > len(tokens): if vocab_size > len(tokens):
pad_count = vocab_size - len(tokens) pad_count = vocab_size - len(tokens)
print( logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]"
)
for i in range(1, pad_count + 1): for i in range(1, pad_count + 1):
tokens.append(f"[PAD{i}]") tokens.append(f"[PAD{i}]")
scores.append(-1000.0) scores.append(-1000.0)
@ -2065,8 +2063,7 @@ class Phi3MiniModel(Model):
tokenizer_path = self.dir_model / 'tokenizer.model' tokenizer_path = self.dir_model / 'tokenizer.model'
if not tokenizer_path.is_file(): if not tokenizer_path.is_file():
print(f'Error: Missing {tokenizer_path}', file=sys.stderr) raise ValueError(f'Error: Missing {tokenizer_path}')
sys.exit(1)
tokenizer = SentencePieceProcessor(str(tokenizer_path)) tokenizer = SentencePieceProcessor(str(tokenizer_path))
@ -2104,7 +2101,7 @@ class Phi3MiniModel(Model):
for key in added_tokens_json: for key in added_tokens_json:
token_id = added_tokens_json[key] token_id = added_tokens_json[key]
if (token_id >= vocab_size): if (token_id >= vocab_size):
print(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}') logger.debug(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
continue continue
tokens[token_id] = key.encode("utf-8") tokens[token_id] = key.encode("utf-8")